Code
pacman::p_load(ggplot2,readr,dplyr,lubridate,pheatmap,tmap, tidyverse,sf)March 7, 2024
March 9, 2024
1.read data
2.data caleaning
data <- read_csv("data/weather.csv", na = c("?", "�"))
data <- data %>%
dplyr::filter(Year >= 2014, Year <= 2023)
colnames(data) <- c(
'Station', 'Year', 'Month', 'Day', 'DailyRainfall',
'Highest30minRainfall', 'Highest60minRainfall', 'Highest120minRainfall',
'MeanTemperature', 'MaxTemperature', 'MinTemperature',
'MeanWindSpeed', 'MaxWindSpeed'
)
data <- data %>%
mutate(
DailyRainfall = as.numeric(DailyRainfall),
Highest30minRainfall = as.numeric(Highest30minRainfall),
Highest60minRainfall = as.numeric(Highest60minRainfall),
Highest120minRainfall = as.numeric(Highest120minRainfall),
MeanTemperature = as.numeric(MeanTemperature),
MaxTemperature = as.numeric(MaxTemperature),
MinTemperature = as.numeric(MinTemperature)
) %>%
suppressWarnings()
sum(is.na(data$DailyRainfall))[1] 8716
# A tibble: 120 × 3
# Groups: Year [10]
Year Month TotalRainfall
<dbl> <dbl> <dbl>
1 2014 1 4316.
2 2014 2 1012.
3 2014 3 7212
4 2014 4 12689.
5 2014 5 14700.
6 2014 6 9797
7 2014 7 12124.
8 2014 8 12674.
9 2014 9 7148.
10 2014 10 7821.
# ℹ 110 more rows
1.heatmap
# 首先,确保Year和Month列是因子类型,以便在热图中正确显示
monthly_rainfall$Year <- factor(monthly_rainfall$Year)
monthly_rainfall$Month <- factor(monthly_rainfall$Month,
levels = c(1:12),
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))
# 创建热图
ggplot(monthly_rainfall, aes(x = Month, y = Year, fill = TotalRainfall)) +
geom_tile(color = "white") + # 使用geom_tile来创建热图的方块
scale_fill_gradient(low = "white", high = "blue") + # 定义颜色渐变范围
theme_minimal() + # 使用简约主题
labs(fill = "Total Rainfall (mm)",
title = "Monthly Rainfall Heatmap",
x = "Month",
y = "Year") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # 调整X轴文字角度,如果有需要
2.map
# A tibble: 6 × 13
Station Year Month Day DailyRainfall Highest30minRainfall
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Paya Lebar 2014 1 1 0 NA
2 Paya Lebar 2014 1 2 0 NA
3 Paya Lebar 2014 1 3 2.2 NA
4 Paya Lebar 2014 1 4 0.6 NA
5 Paya Lebar 2014 1 5 10.5 NA
6 Paya Lebar 2014 1 6 31.2 NA
# ℹ 7 more variables: Highest60minRainfall <dbl>, Highest120minRainfall <dbl>,
# MeanTemperature <dbl>, MaxTemperature <dbl>, MinTemperature <dbl>,
# MeanWindSpeed <chr>, MaxWindSpeed <chr>
# 从API获取站点数据
response <- GET("https://api.data.gov.sg/v1/environment/rainfall")
if (status_code(response) != 200) {
stop("Failed to fetch data from API")
}
# 解析站点数据
api_data <- fromJSON(content(response, "text", encoding = "UTF-8"), flatten = TRUE)
stations <- api_data$metadata$stations
# 假定stations已经是一个data.frame
stations_df <- data.frame(
StationID = stations$id,
StationName = stations$name,
Latitude = stations$location.latitude,
Longitude = stations$location.longitude,
stringsAsFactors = FALSE
)'data.frame': 62 obs. of 5 variables:
$ id : chr "S77" "S109" "S117" "S64" ...
$ device_id : chr "S77" "S109" "S117" "S64" ...
$ name : chr "Alexandra Road" "Ang Mo Kio Avenue 5" "Banyan Road" "Bukit Panjang Road" ...
$ location.latitude : num 1.29 1.38 1.26 1.38 1.32 ...
$ location.longitude: num 104 104 104 104 104 ...
yearly_rainfall_summary <- data %>%
group_by(Station, Year) %>%
summarise(AverageRainfall = mean(DailyRainfall, na.rm = TRUE), .groups = 'drop')
Station_Records <- read_excel("data/Station_Records.xlsx")
# 将年度降雨总结数据与站点记录合并
final_data <- merge(yearly_rainfall_summary, Station_Records, by = "Station", all = TRUE)
# 显示合并后的数据头部
head(final_data) Station Year AverageRainfall Latitude Longitude
1 Admiralty 2014 5.991060 1.4439 103.7854
2 Admiralty 2015 5.607671 1.4439 103.7854
3 Admiralty 2016 5.377049 1.4439 103.7854
4 Admiralty 2017 6.939649 1.4439 103.7854
5 Admiralty 2018 6.126593 1.4439 103.7854
6 Admiralty 2019 5.395616 1.4439 103.7854
# 过滤出2023年的数据
final_data_2023 <- final_data[final_data$Year == 2023, ]
# Step 1: Convert your final_data to an sf object
final_sf <- st_as_sf(final_data_2023, coords = c("Longitude", "Latitude"), crs = 4326)
# Assuming final_data is prepared with 'Longitude' and 'Latitude'
voronoi <- deldir(final_data_2023$Longitude, final_data_2023$Latitude)
# Extract the tiles and convert them into polygons
v.polygons <- tile.list(voronoi)
thiessen_polygons <- lapply(seq_along(v.polygons), function(i) {
tile <- v.polygons[[i]]
# Convert the tile coordinates to a matrix
coords <- matrix(c(tile$x, tile$y), ncol = 2, byrow = TRUE)
# Create a Polygon object
Polygon(coords)
})
# Combine all Polygons into a SpatialPolygons object
thiessen_sp <- SpatialPolygons(lapply(seq(thiessen_polygons), function(i) {
Polygons(list(thiessen_polygons[[i]]), ID = as.character(i))
}))
# Convert SpatialPolygons object to sf object
thiessen_sf <- st_as_sf(thiessen_sp, crs = st_crs(final_sf))Reading layer `MP14_SUBZONE_WEB_PL' from data source
`D:\kellyzhaokl\ISSS608-VAA\Take-home_Ex\Take-home_Ex04\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
# Assuming final_data is already loaded and filtered for the year 2023
# Transform final_sf to the same projection as mpsz (SVY21)
final_sf <- st_transform(final_sf, crs = st_crs(mpsz))
# Since your mpsz data is already read, we will directly proceed with the spatial join
# Join the rainfall data to the subzone polygons based on their location
mpsz_with_rainfall <- st_join(mpsz, final_sf, join = st_intersects)
# You may want to handle NA values or aggregation if necessary here
# Set tmap mode to interactive viewing
tmap_mode("view")
# Create an interactive map
tm <- tm_shape(mpsz_with_rainfall) +
tm_polygons(col = "AverageRainfall", palette = "RdBu",
title = "Average Rainfall in 2023 (mm)") +
tm_borders() +
tm_legend(outside = TRUE) +
tm_tiles("CartoDB.Positron") # Background tile layer
# Convert to leaflet object for further customization or display
leaflet_map <- tmap_leaflet(tm)
# Print the leaflet map to view the interactive version
leaflet_map# Create a tessellated surface
th <- dirichlet(as.ppp(final_sf)) |> st_as_sfc() |> st_as_sf()
# The dirichlet function does not carry over projection information
# requiring that this information be added manually
st_crs(th) <- st_crs(final_sf)
# The tessellated surface does not store attribute information
# from the point data layer. We'll join the point attributes to the polygons
th2 <- st_join(th, final_sf, fn=mean)
# Finally, we'll clip the tessellated surface to the Texas boundaries
th.clp <- st_intersection(th2, mpsz)tm_map <- tm_shape(th.clp) +
tm_polygons(col = "AverageRainfall", palette = "RdBu",
title = "Predicted precipitation (in inches)") +
tm_shape(final_sf) +
tm_bubbles(size = "AverageRainfall", col = "white", border.col = "gray",
alpha = 0.2, border.alpha = 0.3, border.lwd = 0.1,
popup.vars = c(Station = "Station", AverageRainfall = "AverageRainfall")) +
tm_legend(legend.outside = TRUE)
# View the map
tmap_mode("view")
tm_map